#-----------------------------------------------------------------------------#
#### IMPORT ####
#-----------------------------------------------------------------------------#
gc()
library(data.table)
library("lfe")
library(texreg)
library(readstata13)
# library(plm)
library(Hmisc)
library(haven)
library(plyr,include.only="rbind.fill")
# library("didimputation") Borusyak
# library("DIDmultiplegt") Chaisemartin

memory.limit(size=64000)

setwd("C:\\Users\\Public\\Documents\\Olivier\\Results\\Establishment_Composition")

simplelag <- function (x,by=NULL,mylag=1,outside=NA) {
  myend <- length(x)- mylag
  if(!is.null(by))  {
    lby <- c(replicate(mylag,""),as.character(by[1:myend]))
    y0 <- c(replicate(mylag,outside),x[1:myend])
    y <- ifelse(as.character(by)==lby,y0,outside)
  }
  else {
    y <- c(replicate(mylag,outside),x[1:myend])
  }
}

forward <- function (x,by=NULL,myforward=1,outside=NA) {
  mystart <- myforward+1
  if(!is.null(by))  {
    fby <- c(as.character(by[mystart:length(x)],replicate(myforward,"")))
    y0 <- c(x[mystart:length(x)],replicate(myforward,outside))
    y <- ifelse(as.character(by)==fby,y0,outside)
  }
  else {
    y <- c(x[mystart:length(x)],replicate(myforward,outside))
  }
}

retain <- function(x,event,outside=NA)
{
  indices <- c(1,which(event==TRUE),length(x)+1)
  values <- c(outside,x[event %in% TRUE])
  y <- rep(values,diff(indices))
}

obtain <- function(x,event,outside=NA)
{
  indices <- c(0,which(event==TRUE),length(x))
  values <- c(x[event %in% TRUE],outside)
  y <- rep(values,diff(indices))
}

wtd.summary <- function (x,w=NA) {
  name <- deparse(substitute(x))
  nb_obs <- length(x)
  sum_wgt <- sum(w[is.na(x) ==F])
  nb_missing <- sum(is.na(x))
  sum_wgt_missing <- sum(w[is.na(x) ==T])
  wtd_mean <- weighted.mean(x,w=w,na.rm=T)
  wtd_sd <- wtd.var(x,w=w,na.rm=T)**0.5
  min <- min(x,na.rm=T)
  q1 <- wtd.quantile(x,weights=w,probs=0.25)
  q2 <- wtd.quantile(x,weights=w,probs=0.50)
  q3 <- wtd.quantile(x,weights=w,probs=0.75)
  max <- max(x,na.rm=T)
  structure(data.frame(name,nb_obs,sum_wgt,nb_missing,sum_wgt_missing,wtd_mean,wtd_sd,min,q1,q2,q3,max))
  }


#-----------------------------------------------------------------------------#
#### DATA CREATION ####
#-----------------------------------------------------------------------------#

# Already done 
# e94 <- data.table(read_dta("est1994.dta",col_select=c("year","est","f9910","f9099","count")))
# e95 <- data.table(read_dta("est1995.dta",col_select=c("year","est","f9910","f9099","count")))
# e96 <- data.table(read_dta("est1996.dta",col_select=c("year","est","f9910","f9099","count")))
# e97 <- data.table(read_dta("est1997.dta",col_select=c("year","est","f9910","f9099","count")))
# e98 <- data.table(read_dta("est1998.dta",col_select=c("year","est","f9910","f9099","count")))
# e99 <- data.table(read_dta("est1999.dta",col_select=c("year","est","f9910","f9099","count")))
# e00 <- data.table(read_dta("est2000.dta",col_select=c("year","est","f9910","f9099","count")))
# e01 <- data.table(read_dta("est2001.dta",col_select=c("year","est","f9910","f9099","count")))
# e02 <- data.table(read_dta("est2002.dta",col_select=c("year","est","f9910","f9099","count")))
# e03 <- data.table(read_dta("est2003.dta",col_select=c("year","est","f9910","f9099","count")))
# e04 <- data.table(read_dta("est2004.dta",col_select=c("year","est","f9910","f9099","count")))
# e05 <- data.table(read_dta("est2005.dta",col_select=c("year","est","f9910","f9099","count")))
# e06 <- data.table(read_dta("est2006.dta",col_select=c("year","est","f9910","f9099","count")))
# e07 <- data.table(read_dta("est2007.dta",col_select=c("year","est","f9910","f9099","count")))
# e08 <- data.table(read_dta("est2008.dta",col_select=c("year","est","f9910","f9099","count")))
# e09 <- data.table(read_dta("est2009.dta",col_select=c("year","est","f9910","f9099","count")))
# e10 <- data.table(read_dta("est2010.dta",col_select=c("year","est","f9910","f9099","count")))
# e11 <- data.table(read_dta("est2011.dta",col_select=c("year","est","f9910","f9099","count")))
# e12 <- data.table(read_dta("est2012.dta",col_select=c("year","est","f9910","f9099","count")))
# e13 <- data.table(read_dta("est2013.dta",col_select=c("year","est","f9910","f9099","count")))
# e14 <- data.table(read_dta("est2014.dta",col_select=c("year","est","f9910","f9099","count")))
# e15 <- data.table(read_dta("est2015.dta",col_select=c("year","est","f9910","f9099","count")))
# e16 <- data.table(read_dta("est2016.dta",col_select=c("year","est","f9910","f9099","count")))
# e17 <- data.table(read_dta("est2017.dta",col_select=c("year","est","f9910","f9099","count")))
# e18 <- data.table(read_dta("est2018.dta",col_select=c("year","est","f9910","f9099","count")))
# e19 <- data.table(read_dta("est2019.dta",col_select=c("year","est","f9910","f9099","count")))
# 
# ee <- rbind(e94,e95,e96,e97,e98,e99,e00,
#             e01,e02,e03,e04,e05,e06,e07,e08,e09,e10,e11,e12,e13,e14,e15,e16,e17,e18,e19,fill=T)
# rm(e94,e95,e96,e97,e98,e99,e00,
#                    e01,e02,e03,e04,e05,e06,e07,e08,e09,e10,e11,e12,e13,e14,e15,e16,e17,e18,e19)
# ee$firm <- substr(ee$est,1,9)
# ee$min_firm <- ave(ee$year,ee$firm,FUN=min)
# ee$min_est <- ave(ee$year,ee$est,FUN=min)
# saveRDS(ee,"est.rds")
ee<-readRDS("est.rds")
ee <- ee[ee$year>2000 # & ee$year<2018
         ,c("year","est","f9910","f9099","count","min_firm")]

gc()

mm02 <- data.table(read_sas("../Worker_Flows/b2002flows.sas7bdat"))
mm02 <- merge(mm02,ee[ee$year %in% 2001,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm02 <- merge(mm02,ee[ee$year %in% 2002,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)
str(mm02)
table(mm02$mstart>360)

mm03 <- data.table(read_sas("../Worker_Flows/b2003flows.sas7bdat"))
mm03 <- merge(mm03,ee[ee$year %in% 2002,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm03 <- merge(mm03,ee[ee$year %in% 2003,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)

mm04 <- data.table(read_sas("../Worker_Flows/b2004flows.sas7bdat"))
mm04 <- merge(mm04,ee[ee$year %in% 2003,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm04 <- merge(mm04,ee[ee$year %in% 2004,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)

mm05 <- data.table(read_sas("../Worker_Flows/b2005flows.sas7bdat"))
mm05 <- merge(mm05,ee[ee$year %in% 2004,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm05 <- merge(mm05,ee[ee$year %in% 2005,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)

mm06 <- data.table(read_sas("../Worker_Flows/b2006flows.sas7bdat"))
mm06 <- merge(mm06,ee[ee$year %in% 2005,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm06 <- merge(mm06,ee[ee$year %in% 2006,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)

mm07 <- data.table(read_sas("../Worker_Flows/b2007flows.sas7bdat"))
mm07 <- merge(mm07,ee[ee$year %in% 2006,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm07 <- merge(mm07,ee[ee$year %in% 2007,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)

mm08 <- data.table(read_sas("../Worker_Flows/b2008flows.sas7bdat"))
mm08 <- merge(mm08,ee[ee$year %in% 2007,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm08 <- merge(mm08,ee[ee$year %in% 2008,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)

mm09 <- data.table(read_sas("../Worker_Flows/b2009flows.sas7bdat"))
mm09 <- merge(mm09,ee[ee$year %in% 2008,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm09 <- merge(mm09,ee[ee$year %in% 2009,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)

mm10 <- data.table(read_sas("../Worker_Flows/b2010flows.sas7bdat"))
mm10 <- merge(mm10,ee[ee$year %in% 2009,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm10 <- merge(mm10,ee[ee$year %in% 2010,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)

mm11 <- data.table(read_sas("../Worker_Flows/b2011flows.sas7bdat"))
mm11 <- merge(mm11,ee[ee$year %in% 2010,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm11 <- merge(mm11,ee[ee$year %in% 2011,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)

mm12 <- data.table(read_sas("../Worker_Flows/b2012flows.sas7bdat"))
mm12 <- merge(mm12,ee[ee$year %in% 2011,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm12 <- merge(mm12,ee[ee$year %in% 2012,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)

mm13 <- data.table(read_sas("../Worker_Flows/b2013flows.sas7bdat"))
mm13 <- merge(mm13,ee[ee$year %in% 2012,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm13 <- merge(mm13,ee[ee$year %in% 2013,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)

mm14 <- data.table(read_sas("../Worker_Flows/b2014flows.sas7bdat"))
mm14 <- merge(mm14,ee[ee$year %in% 2013,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm14 <- merge(mm14,ee[ee$year %in% 2014,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)

mm15 <- data.table(read_sas("../Worker_Flows/b2015flows.sas7bdat"))
mm15 <- merge(mm15,ee[ee$year %in% 2014,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm15 <- merge(mm15,ee[ee$year %in% 2015,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)

mm16 <- data.table(read_sas("../Worker_Flows/b2016flows.sas7bdat"))
mm16 <- merge(mm16,ee[ee$year %in% 2015,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm16 <- merge(mm16,ee[ee$year %in% 2016,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)

mm17 <- data.table(read_sas("../Worker_Flows/b2017flows.sas7bdat"))
mm17 <- merge(mm17,ee[ee$year %in% 2016,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm17 <- merge(mm17,ee[ee$year %in% 2017,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)

mm18 <- data.table(read_sas("../Worker_Flows/b2018flows.sas7bdat"))
mm18 <- merge(mm18,ee[ee$year %in% 2017,c("est","count","min_firm")],by.x="lsiret",by.y="est",all.x=T)
mm18 <- merge(mm18,ee[ee$year %in% 2018,c("est","count","min_firm")],by.x="siret",by.y="est",all.x=T)


mm <- rbind(mm02,mm03,mm04,mm05,mm06,mm07,mm08,mm09,mm10,mm11,mm12,mm13,mm14,mm15,mm16,mm17,mm18,fill=T)
rm(mm02,mm03,mm04,mm05,mm06,mm07,mm08,mm09,mm10,mm11,mm12,mm13,mm14,mm15,mm16,mm17,mm18)
gc()

# ee <- rbind(e01,e02,e03,e04,e05,e06,e07,e08,e09,e10,e11,e12,e13,e14,e15,e16,e17,fill=T)
# rm(e01,e02,e03,e04,e05,e06,e07,e08,e09,e10,e11,e12,e13,e14,e15,e16,e17)
gc()

mm$flow_int <- (mm$NBTMOB2>5)*mm$NBTMOB2
mm$flow_sub <- (mm$NBTMOB2>5 & ((mm$year-mm$min_firm.y)<4))*mm$NBTMOB2
mm$flow_sub <- ifelse(is.na(mm$flow_sub),0,mm$flow_sub)
summary(mm$flow_sub)
mm$flow <- (mm$NBTMOB3-mm$NBTMOB31-mm$NBTMOB32-mm$NBTMOB33)+mm$NBTMOB4+(mm$NBTMOB5-mm$NBTMOB51)
mm[,c("NBTMOB1","NBTMOB2","NBTMOB3","NBTMOB31","NBTMOB32","NBTMOB33","NBTMOB4","NBTMOB5","NBTMOB51","min_firm.x","min_firm.y"):=NULL] #Deleting variables

mm <- mm[mm$flow>0 | mm$flow_int>0,]

mm$dyear <- mm$year-1 # FULL YEAR DEPARTURE IN T-1 
# IF Year=2005, departure in t-1 then dyear=2004.

# mm$ayear <- mm$year+1 # FULL YEAR ARRIVAL IN T+1
# mm$ayear <- mm$year # FULL YEAR ARRIVAL IN T
 
mm$ayear <- ifelse(mm$mstart>359,mm$year+1,mm$year) # FULL YEAR ARRIVAL IN T or T+1 depending on average start day




mm <- merge(mm,ee[,c("est","year")],by.x=c("lsiret","dyear"),by.y=c("est","year"))
mm <- mm[order(mm$siret,mm$lsiret,mm$dyear),]
rownames(mm) <- NULL
mm$ldyear <- simplelag(mm$dyear,paste(mm$siret,mm$lsiret),outside=NA)
mm$drop <- ifelse(is.na(mm$ldyear)==F & mm$dyear %in% mm$ldyear+1,1,0)
nrow(mm)
mm <- mm[mm$drop %in% 0, ]
nrow(mm)

#Aggregation of flows between firms 
#mm$firm_flow <- ave(mm$flow,paste(substr(mm$lsiret,1,9),substr(mm$siret,1,9)),FUN=sum)
mm$key <- paste(mm$dyear, substr(mm$lsiret,1,9),substr(mm$siret,1,9))
mm[,`:=` (firm_flow=sum(flow)),by=key] # adding flow firm
mm[,c("key"):=NULL] #Deleting variables
gc()

#Reminder 
#Malgouyres #Naf 2003

#High Skill outsourcing 
# 72 IT services
# 74.1 Admin services, management consulting
# 74.4 Advertising
# 74.5 HR services

#Low Skill outsourcing 
# 74.6 Security
# 74.7 Cleaning
# 60.2 Urban and road transportation
# 63.1 Maintenance and storage
# 63.4 Logistics of merchandise transportation


# Goldschmitt & Schneider Outsourcing event
# 10 employees flow from one ESTABLISHMENT not in Logistics, food services, cleaning or security OR TEMP
# to another ESTABLISHMENT in  Logistics, food services, cleaning or security 
# predecessor establishment 50 employees
# continue to exist following year
# flow < 30% predecessor establishment workforce

mm$lapet_1 <- ifelse(mm$year>=2008,"",mm$lapet_1)
mm$apet <- ifelse(mm$year>=2008,"",mm$apet)

mm$lind_1 <- ifelse(mm$year<2008,"",mm$lind_1)
mm$ind <- ifelse(mm$year<2008,"",mm$ind)

mm$gs <- mm$flow>9 & mm$lsize_est>49 & mm$flow/mm$lsize_est<0.3 # + existence condition in time T

#Low skill outsourcing
#Transportation
mm$outs_49 <- ( (substr(mm$lind_1,1,3) %in% c("493","494") 
                 | substr(mm$lapet_1,1,3) %in% c("602"))==F
               & (substr(mm$ind,1,3) %in% c("493","494") 
                  | substr(mm$apet,1,3) %in% c("602"))
               )*mm$flow



#Logistics
mm$outs_52 <- ((substr(mm$lind_1,1,3) %in% c("521","522") 
                 | substr(mm$lapet_1,1,3) %in% c("631","634"))==F
                & (substr(mm$ind,1,3) %in% c("521","522") 
                   | substr(mm$apet,1,3) %in% c("631","634"))
)*mm$flow



#Restauration
mm$outs_56 <- ((substr(mm$lind_1,1,2) %in% c("56") 
                | substr(mm$lapet_1,1,2) %in% c("55"))==F
               & (substr(mm$ind,1,2) %in% c("56") 
                | substr(mm$apet,1,2) %in% c("55")))*mm$flow 



#Security
mm$outs_80 <- ((substr(mm$lind_1,1,2) %in% c("80") 
                 | substr(mm$lapet_1,1,3) %in% c("746"))==F
                & (substr(mm$ind,1,2) %in% c("80") 
                   | substr(mm$apet,1,3) %in% c("746")))*mm$flow 



#Cleaning
mm$outs_81 <- ((substr(mm$lind_1,1,3) %in% c("812") 
                | substr(mm$lapet_1,1,3) %in% c("747"))==F
               & (substr(mm$ind,1,3) %in% c("812") 
                  | substr(mm$apet,1,3) %in% c("747")))*mm$flow 

mm$outs_lsk <- mm$outs_49+mm$outs_52+mm$outs_56+mm$outs_80+mm$outs_81

tapply((mm$outs_lsk>10),mm$dyear,sum,na.rm=T)
tapply((mm$outs_hsk>10),mm$dyear,sum,na.rm=T)

table(mm$outs_lsk)
#High skill outsourcing

#High Skill outsourcing 
# 72 IT services
# 74.1 Admin services, management consulting
# 74.4 Advertising
# 74.5 HR services (but this is temp work... Not outsourcing for me)

#Computer
mm$outs_62 <- ((substr(mm$lind_1,1,2) %in% c("62") 
                | substr(mm$lapet_1,1,2) %in% c("72"))==F
               & (substr(mm$ind,1,2) %in% c("62") 
                  | substr(mm$apet,1,2) %in% c("72")))*mm$flow 

# Legal and accounting consulting
mm$outs_69 <- ((substr(mm$lind_1,1,3) %in% c("69","70") 
                | substr(mm$lapet_1,1,3) %in% c("741"))==F
               & (substr(mm$ind,1,3) %in% c("69","70") 
                  | substr(mm$apet,1,3) %in% c("741")))*mm$flow 

#Technical consulting
mm$outs_71 <- ((substr(mm$lind_1,1,2) %in% c("71") 
                | substr(mm$lapet_1,1,3) %in% c("742"))==F
               & (substr(mm$ind,1,2) %in% c("71") 
                  | substr(mm$apet,1,3) %in% c("742")))*mm$flow 
#Advertising
mm$outs_73 <- ((substr(mm$lind_1,1,2) %in% c("73") 
                | substr(mm$lapet_1,1,3) %in% c("744"))==F
               & (substr(mm$ind,1,3) %in% c("7.") 
                  | substr(mm$apet,1,2) %in% c("744")))*mm$flow 


# A mix with call centers and Xerox which are neither low or high skill
# #Administrative support to firms
# mm$outs_82 <- ((substr(mm$lind_1,1,2) %in% c("82") 
#                 | substr(mm$lapet_1,1,3) %in% c("748"))==F
#                & (substr(mm$ind,1,2) %in% c("82") 
#                   | substr(mm$apet,1,3) %in% c("748")))*mm$flow 
# 


mm$outs_hsk <- mm$outs_62 + mm$outs_69 + mm$outs_71 + mm$outs_73

str(mm)
saveRDS(mm,"flow.rds")
mm <- readRDS("flow.rds")
# fwrite(mm,"flow.csv")

gc()
str(mm)

mm$key <- paste(mm$lsiret,mm$dyear)
mmm <- mm[,.(outs_lsk=sum(outs_lsk*(outs_lsk>5)),
             outs_hsk=sum(outs_hsk*(outs_hsk>5)),
             outs_49=sum(outs_49*(outs_49>5)),
             outs_52=sum(outs_52*(outs_52>5)),
             outs_56=sum(outs_56*(outs_56>5)),
             outs_80=sum(outs_80*(outs_80>5)),
             outs_81=sum(outs_81*(outs_81>5)),
             depflow_int=sum(flow_int),
             depflow_sub=sum(flow_sub),
             depsize=min(lsize_est),
             deparrsize=(mean(lsize_est)+mean(size_est))/2,
             lsiret=min(lsiret),
             dyear=min(dyear)),by=key]
str(mmm)


mmm[,c("key"):=NULL] #Deleting variables


mm$key <- paste(mm$siret,mm$ayear)
aaa <- mm[,.(arr_lsk=sum(outs_lsk*(outs_lsk>5)),
             arr_hsk=sum(outs_hsk*(outs_hsk>5)),
             arr_49=sum(outs_49*(outs_49>5)),
             arr_52=sum(outs_52*(outs_52>5)),
             arr_56=sum(outs_56*(outs_56>5)),
             arr_80=sum(outs_80*(outs_80>5)),
             arr_81=sum(outs_81*(outs_81>5)),
             arrflow_int=sum(flow_int),
             arrflow_sub=sum(flow_sub),
             arrsize=min(size_est),
             arrdepsize=(mean(lsize_est)+mean(size_est))/2,
             siret=min(siret),
             ayear=min(ayear)),by=key]
str(aaa)

aaa[,c("key"):=NULL] #Deleting variables


ee <- merge(ee,aaa[,c("siret","ayear","arr_lsk","arrsize","arrdepsize","arrflow_int","arrflow_sub")],by.x=c("est","year"),by.y=c("siret","ayear"),all.x=T)
ee <- merge(ee,mmm,by.x=c("est","year"),by.y=c("lsiret","dyear"),all.x=T)
str(ee)
ee[,c("key"):=NULL] #Deleting variables
str(ee)
gc()
rm(aaa,mm,mmm)
ee$outs_49[is.na(ee$outs_49)] <- 0
ee$outs_52[is.na(ee$outs_52)] <- 0
ee$outs_56[is.na(ee$outs_56)] <- 0
ee$outs_80[is.na(ee$outs_80)] <- 0
ee$outs_81[is.na(ee$outs_81)] <- 0
ee$outs_lsk[is.na(ee$outs_lsk)] <- 0
ee$arr_lsk[is.na(ee$arr_lsk)] <- 0
ee$outs_hsk[is.na(ee$outs_hsk)] <- 0
ee$depflow_int[is.na(ee$depflow_int)] <- 0
ee$depflow_sub[is.na(ee$depflow_sub)] <- 0
ee$arrflow_int[is.na(ee$arrflow_int)] <- 0
ee$arrflow_sub[is.na(ee$arrflow_sub)] <- 0


ee$f9010 <- ee$f9910+ee$f9099
ee$f9010xf9010 <- ifelse(ee$f9010>0,(ee$f9010-1)/(ee$count-1),NA)
ee$f9910xf9910 <- ifelse(ee$f9910>0,(ee$f9910-1)/(ee$count-1),NA)
ee$one <- 1

ee$sf9010 <- ave(ee$f9010,ee$year,FUN=sum)
ee$f9010_w <- ee$f9010/ee$sf9010


ee$sf9910 <- ave(ee$f9910,ee$year,FUN=sum)
ee$f9910_w <- ee$f9910/ee$sf9910
ee$lnbwkrs <- ifelse(ee$count>0,log(ee$count),NA)

ee <- ee[order(ee$est,ee$year),]
ee$lcount <- simplelag(ee$count,by=ee$est)
ee$lndnbwkrs_neg <- (log(ee$count)-log(ee$lcount))*((log(ee$count)-log(ee$lcount))<=0)
ee$lndnbwkrs_neg <- ifelse(is.na(ee$lndnbwkrs_neg),log(ee$count),ee$lndnbwkrs_neg)
ee$lnnbwkrs_cumneg <- ave(ee$lndnbwkrs_neg,ee$est,FUN=function(x) cumsum(x)) 

# wtd.summary(ee$f9010xf9010[ee$year==2014],w=ee$f9010_w[ee$year==2014])
# str(ee)
# table(ee$year[ee$outs_lsk>0])
# summary(is.na(ee$f9010xf9010))
# sd(ee$f9010xf9010[ee$year==2014],na.rm=T)

ee[,c("sf9910","f9910_w","f9910xf9910",
      "f9099","f9910","f9010","sf9010","lndnbwkrs_neg",
      "lcount","lnbwkrs","one"):=NULL] #Deleting variables

ee$arr_lsk[is.na(ee$arr_lsk)] <- 0
ee$yarr_lsk <- ave(ifelse((ee$arr_lsk>0)*ee$year>0,(ee$arr_lsk>0)*ee$year,10000),ee$est,FUN=function(x) min(x,na.rm=T))
ee$youts_lsk <- ave(ifelse((ee$outs_lsk>0)*ee$year>0,(ee$outs_lsk>0)*ee$year,10000),ee$est,FUN=function(x) min(x,na.rm=T))
ee$youts_hsk <- ave(ifelse((ee$outs_hsk>0)*ee$year>0,(ee$outs_hsk>0)*ee$year,10000),ee$est,FUN=function(x) min(x,na.rm=T))
ee$youts_49 <- ave(ifelse((ee$outs_49>0)*ee$year>0,(ee$outs_49>0)*ee$year,10000),ee$est,FUN=function(x) min(x,na.rm=T))
ee$youts_52 <- ave(ifelse((ee$outs_52>0)*ee$year>0,(ee$outs_52>0)*ee$year,10000),ee$est,FUN=function(x) min(x,na.rm=T))
ee$youts_56 <- ave(ifelse((ee$outs_56>0)*ee$year>0,(ee$outs_56>0)*ee$year,10000),ee$est,FUN=function(x) min(x,na.rm=T))
ee$youts_80 <- ave(ifelse((ee$outs_80>0)*ee$year>0,(ee$outs_80>0)*ee$year,10000),ee$est,FUN=function(x) min(x,na.rm=T))
ee$youts_81 <- ave(ifelse((ee$outs_81>0)*ee$year>0,(ee$outs_81>0)*ee$year,10000),ee$est,FUN=function(x) min(x,na.rm=T))

ee$nouts_lsk <- ave((ee$outs_lsk>0),ee$est,FUN=sum)
ee$y2outs_lsk <- ave(ifelse(ee$year==ee$youts_lsk,10000,ifelse((ee$outs_lsk>0)*ee$year>0,(ee$outs_lsk>0)*ee$year,10000)),ee$est,FUN=min)
ee$y2arr_lsk <- ave(ifelse(ee$year==ee$yarr_lsk,10000,ifelse((ee$arr_lsk>0)*ee$year>0,(ee$arr_lsk>0)*ee$year,10000)),ee$est,FUN=min)

# ee$firm <- substr(ee$est,1,9)
# gc()
# 
# ee$firm <- substr(ee$est,1,9)

saveRDS(ee,"outsourcing.rds")


#----------------------------------------------------------------------------------------------#
#### DATA RESTART ####
#----------------------------------------------------------------------------------------------#
rm(ee,dd)
gc()
ee <- readRDS("outsourcing.rds")
str(ee)

ee[,c("sf9910","f9910_w","f9910xf9910",
      "f9099","f9910","f9010","sf9010","lndnbwkrs_neg",
      "lcount","lnbwkrs","one"):=NULL] #Deleting variables

# ee <- ee[ee$f9010_w>0 & is.na(ee$f9010_w)==F,]
ee$routs_lsk <- ifelse(is.na(ee$depsize)==T | ee$depsize==0,0,ee$outs_lsk/ee$depsize)
ee$outs_lsk_cum <- ave(ee$outs_lsk>0,ee$est,FUN=function(x) cumsum(x))
ee$routs_lsk_cum <- ave(ee$routs_lsk,ee$est,FUN=function(x) cumsum(x)) 


ee$routs_arr_lsk <- ifelse(is.na(ee$deparrsize)==T | ee$deparrsize==0,
                           ifelse(is.na(ee$arrdepsize)==T | ee$arrdepsize==0,0,ee$arr_lsk/ee$arrdepsize),
                           ee$outs_lsk/ee$deparrsize)
ee$yrouts_lsk <- ave(ee$routs_lsk*(ee$youts_lsk==10000 | ee$year<ee$y2outs_lsk),ee$est,FUN=max)
ee$yrouts_arr_lsk <- ave(ee$routs_arr_lsk*(pmin(ee$youts_lsk,ee$yarr_lsk)==10000 | ee$year<pmin(ee$y2outs_lsk,ee$y2arr_lsk)),ee$est,FUN=max)


ee$ydep_sub <- ave(ifelse((ee$depflow_sub>0)*ee$year>0,(ee$depflow_sub>0)*ee$year,10000),ee$est,FUN=function(x) min(x,na.rm=T))
ee$yarr_sub <- ave(ifelse((ee$arrflow_sub>0)*ee$year>0,(ee$arrflow_sub>0)*ee$year,10000),ee$est,FUN=function(x) min(x,na.rm=T))
ee$y2arr_sub <- ave(ifelse(ee$year==ee$yarr_sub,10000,ifelse((ee$arrflow_sub>0)*ee$year>0,(ee$arrflow_sub>0)*ee$year,10000)),ee$est,FUN=min)
ee$y2dep_sub <- ave(ifelse(ee$year==ee$ydep_sub,10000,ifelse((ee$depflow_sub>0)*ee$year>0,(ee$depflow_sub>0)*ee$year,10000)),ee$est,FUN=min)
ee$rdep_sub <- ifelse(is.na(ee$depsize)==T | ee$depsize==0,0,ee$depflow_sub/ee$depsize)
ee$rdep_arr_sub <- ifelse(is.na(ee$deparrsize)==T | ee$deparrsize==0,
                          ifelse(is.na(ee$arrdepsize)==T | ee$arrdepsize==0,0,ee$arrflow_sub/ee$arrdepsize),
                          ee$depflow_sub/ee$deparrsize)
ee$yrdep_sub <- ave(ee$rdep_sub*(ee$ydep_sub==10000 | ee$year<ee$y2dep_sub),ee$est,FUN=max)
ee$yrdep_arr_sub <- ave(ee$rdep_arr_sub*(pmin(ee$ydep_sub,ee$yarr_sub)==10000 | ee$year<pmin(ee$y2dep_sub,ee$y2arr_sub)),ee$est,FUN=max)



ee$ydep_int <- ave(ifelse((ee$depflow_int>0)*ee$year>0,(ee$depflow_int>0)*ee$year,10000),ee$est,FUN=function(x) min(x,na.rm=T))
ee$yarr_int <- ave(ifelse((ee$arrflow_int>0)*ee$year>0,(ee$arrflow_int>0)*ee$year,10000),ee$est,FUN=function(x) min(x,na.rm=T))
ee$y2arr_int <- ave(ifelse(ee$year==ee$yarr_int,10000,ifelse((ee$arrflow_int>0)*ee$year>0,(ee$arrflow_int>0)*ee$year,10000)),ee$est,FUN=min)
ee$y2dep_int <- ave(ifelse(ee$year==ee$ydep_int,10000,ifelse((ee$depflow_int>0)*ee$year>0,(ee$depflow_int>0)*ee$year,10000)),ee$est,FUN=min)
ee$rdep_int <- ifelse(is.na(ee$depsize)==T | ee$depsize==0,0,ee$depflow_int/ee$depsize)
ee$rdep_arr_int <- ifelse(is.na(ee$deparrsize)==T | ee$deparrsize==0,
                          ifelse(is.na(ee$arrdepsize)==T | ee$arrdepsize==0,0,ee$arrflow_int/ee$arrdepsize),
                          ee$depflow_int/ee$deparrsize)
ee$yrdep_int <- ave(ee$rdep_int*(ee$ydep_int==10000 | ee$year<ee$y2dep_int),ee$est,FUN=max)
ee$temp <- (pmin(ee$ydep_int,ee$yarr_int)==10000 | ee$year<pmin(ee$y2dep_int,ee$y2arr_int))
ee$yrdep_arr_int <- ave(ee$rdep_arr_int*ee$temp,ee$est,FUN=max)
ee[,c("temp"):=NULL] #Deleting variables

gc()


#----------------------------------------------------------------------------------------------#
#### Descriptives 1. ####
#----------------------------------------------------------------------------------------------#
descum1 <-  table(ee$outs_lsk_cum)
descum2 <-  table(ee$year,ee$outs_lsk_cum)

# There is 2644 1st lsk_event, 44 2nd lsk event and 11 3rd lsk event
descum1
descum2


hsk <- data.frame(table(ee$year[ee$outs_hsk>0]))
lsk <- data.frame(table(ee$year[ee$outs_lsk>0]))
lsk_n <- data.frame(tapply(ee$outs_lsk,ee$year,sum))
lsk_n$Var1 <- c(2001:2019)
lsk_r <- data.frame(tapply(ee$routs_lsk,ee$year,mean))
lsk_r$Var1 <- c(2001:2019)
o49 <- data.frame(table(ee$year[ee$outs_49>0]))
o52 <- data.frame(table(ee$year[ee$outs_52>0]))
o56 <- data.frame(table(ee$year[ee$outs_56>0]))
o80 <- data.frame(table(ee$year[ee$outs_80>0]))
o81 <- data.frame(table(ee$year[ee$outs_81>0]))
depsub <- data.frame(table(ee$year[ee$depflow_sub>0]))
depint <- data.frame(table(ee$year[ee$depflow_int>0]))
count <- data.frame(table(ee$year[ee$count>0]))
lsk_90 <- data.frame(table(ee$year[ee$outs_lsk>0 & ee$f9010_w>0]))
count_90 <- data.frame(table(ee$year[ee$count>0 & ee$f9010_w>0]))

all<-merge(hsk,lsk,by="Var1",all.x=T)
colnames(all)<-c("Var1","hsk","lsk")

all<-merge(all,lsk_n[c(1:17),],by="Var1",all.x=T)
colnames(all)[ncol(all)]<-c("lsk_n")

all<-merge(all,lsk_r[c(1:17),],by="Var1",all.x=T)
colnames(all)[ncol(all)]<-c("lsk_r")

all<-merge(all,o49,by="Var1",all.x=T)
colnames(all)[ncol(all)]<-c("o49")

all<-merge(all,o52,by="Var1",all.x=T)
colnames(all)[ncol(all)]<-c("o52")

all<-merge(all,o56,by="Var1",all.x=T)
colnames(all)[ncol(all)]<-c("o56")

all<-merge(all,o80,by="Var1",all.x=T)
colnames(all)[ncol(all)]<-c("o80")

all<-merge(all,o81,by="Var1",all.x=T)
colnames(all)[ncol(all)]<-c("o81")


all<-merge(all,depsub,by="Var1",all.x=T)
colnames(all)[ncol(all)]<-c("depsub")

all<-merge(all,depint,by="Var1",all.x=T)
colnames(all)[ncol(all)]<-c("depint")

all<-merge(all,count,by="Var1",all.x=T)
colnames(all)[ncol(all)]<-c("count")

all<-merge(all,lsk_90,by="Var1",all.x=T)
colnames(all)[ncol(all)]<-c("lsk_90")

all<-merge(all,count_90,by="Var1",all.x=T)
colnames(all)[ncol(all)]<-c("count_90")


all


write.csv(all,"evo_outsourcing.csv")

gc()

#----------------------------------------------------------------------------------------------#
#### Descriptives 2. ####
#----------------------------------------------------------------------------------------------#
ee <- ee[ee$f9010_w>0 & is.na(ee$f9010_w)==F,]
ee$firm <- substr(ee$est,1,9)
gc()
field <- ee$f9010_w>0 & is.na(ee$f9010_w)==F &
  (ee$youts_lsk==10000 | ee$year<ee$y2outs_lsk) & 
  (ee$year-ee$youts_lsk)<5 &
  (ee$year-ee$youts_lsk) %in% c(-20:-5)==F
field2 <- field & ee$year>2000  & ee$year<2018

ee$lnbwkrs<-log(ee$count)

d1 <- wtd.summary(ee$f9010xf9010[field2],w=ee$f9010_w[field2])
d2 <- wtd.summary(ee$lnbwkrs[field2],w=ee$f9010_w[field2])
d3 <- wtd.summary(ee$lnnbwkrs_cumneg[field2],w=ee$f9010_w[field2])
d4 <- wtd.summary(((ee$outs_lsk[field2]>0)*1),w=ee$f9010_w[field2])
d5 <- wtd.summary(((ee$arr_lsk[field2]>0)*1),w=ee$f9010_w[field2])
d6 <- wtd.summary(((ee$outs_hsk[field2]>0)*1),w=ee$f9010_w[field2])
d7 <- wtd.summary(((ee$outs_49[field2]>0)*1),w=ee$f9010_w[field2])
d8 <- wtd.summary(((ee$outs_52[field2]>0)*1),w=ee$f9010_w[field2])
d9 <- wtd.summary(((ee$outs_56[field2]>0)*1),w=ee$f9010_w[field2])
d10 <- wtd.summary(((ee$outs_80[field2]>0)*1),w=ee$f9010_w[field2])
d11 <- wtd.summary(((ee$outs_81[field2]>0)*1),w=ee$f9010_w[field2])
d12 <- wtd.summary(ee$routs_lsk[field2],w=ee$f9010_w[field2])
d13 <- wtd.summary(ee$routs_arr_lsk[field2],w=ee$f9010_w[field2])
d14 <- wtd.summary(ee$f9010_w[field2],w=ee$f9010_w[field2])
d15 <- wtd.summary(((ee$depflow_sub[field2]>0)*1),w=ee$f9010_w[field2])
d16 <- wtd.summary(((ee$depflow_int[field2]>0)*1),w=ee$f9010_w[field2])
d17 <- wtd.summary(ee$rdep_arr_sub[field2],w=ee$f9010_w[field2])
d18 <- wtd.summary(ee$rdep_arr_int[field2],w=ee$f9010_w[field2])

des_outsourc_flow <- rbind(d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14,d15,d16,d17,d18) 
rownames(des_outsourc_flow) <- NULL
des_outsourc_flow
write.csv(des_outsourc_flow,"des_outsourc_flow.csv")

#------------------------------------#
#### Event study design ####
#------------------------------------#

#### Without cumulative dummies ####
gc()

# base null linear trend model
field <- ee$f9010_w>0 & is.na(ee$f9010_w)==F &
  (ee$youts_lsk==10000 | ee$year<ee$y2outs_lsk) & 
  (ee$year-ee$youts_lsk)<5 &
  (ee$year-ee$youts_lsk) %in% c(-20:-5)==F & ee$year>2000  # & ee$year<2016

rm(dd)
dd <- felm(I(100*f9010xf9010) ~ year
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01),digits=5)
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_lsk_0.html",digits=5)

# base null  model
rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            # + I((year - youts_lsk) %in%  -4 )
            # + I((year - youts_lsk) %in% -3 )
            # + I((year - youts_lsk) %in% -2 )
            # # + I((year - youts_lsk) %in% -1 )
            # + I((year - youts_lsk) %in% 0 )
            # + I((year - youts_lsk) %in% 1 )
            # + I((year - youts_lsk) %in% 2 )
            # + I((year - youts_lsk) %in% 3 )
            # + I((year - youts_lsk)  %in% 4 )
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_lsk_0b.html")


# Base model
rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I((year - youts_lsk) %in%  -4 )
            + I((year - youts_lsk) %in% -3 )
            + I((year - youts_lsk) %in% -2 )
            # + I((year - youts_lsk) %in% -1 )
            + I((year - youts_lsk) %in% 0 )
            + I((year - youts_lsk) %in% 1 )
            + I((year - youts_lsk) %in% 2 )
            + I((year - youts_lsk) %in% 3 )
            + I((year - youts_lsk)  %in% 4 )
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_lsk_1.html")


# Base model + size
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I((year - youts_lsk)  %in% -4)
            + I((year - youts_lsk) %in% -3 )
            + I((year - youts_lsk) %in% -2 )
            # + I((year - youts_lsk) %in% -1 )
            + I((year - youts_lsk) %in% 0 )
            + I((year - youts_lsk) %in% 1 )
            + I((year - youts_lsk) %in% 2 )
            + I((year - youts_lsk) %in% 3 )
            + I((year - youts_lsk)  %in% 4 )
            + log(count)
            + lnnbwkrs_cumneg
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_lsk_size.html")


# Base model magnitude
rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I(yrouts_lsk*((year - youts_lsk) %in%  -4 ))
            + I(yrouts_lsk*((year - youts_lsk) %in% -3 ))
            + I(yrouts_lsk*((year - youts_lsk) %in% -2 ))
            # + I(yrouts_lsk*((year - youts_lsk) %in% -1 ))
            + I(yrouts_lsk*((year - youts_lsk) %in% 0 ))
            + I(yrouts_lsk*((year - youts_lsk) %in% 1 ))
            + I(yrouts_lsk*((year - youts_lsk) %in% 2 ))
            + I(yrouts_lsk*((year - youts_lsk) %in% 3 ))
            + I(yrouts_lsk*((year - youts_lsk)  %in% 4 ))
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_rlsk_1.html")

# Base model magnitude + size
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
           + I(yrouts_lsk*((year - youts_lsk) %in%  -4 ))
           + I(yrouts_lsk*((year - youts_lsk) %in% -3 ))
           + I(yrouts_lsk*((year - youts_lsk) %in% -2 ))
           # + I(yrouts_lsk*((year - youts_lsk) %in% -1 ))
           + I(yrouts_lsk*((year - youts_lsk) %in% 0 ))
           + I(yrouts_lsk*((year - youts_lsk) %in% 1 ))
           + I(yrouts_lsk*((year - youts_lsk) %in% 2 ))
           + I(yrouts_lsk*((year - youts_lsk) %in% 3 ))
           + I(yrouts_lsk*((year - youts_lsk)  %in% 4 ))
           + log(count)
           + lnnbwkrs_cumneg
           |est|0|firm, 
           weights=ee$f9010_w[field],
           data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_rlsk_size.html")



#### Trend analysis ####
# base null linear trend model
field <- ee$f9010_w>0 & is.na(ee$f9010_w)==F &
  (ee$youts_lsk==10000 | ee$year<ee$y2outs_lsk) & 
  (ee$year-ee$youts_lsk)<5 &
  (ee$year-ee$youts_lsk) %in% c(-20:-5)==F & ee$year>2000  # & ee$year<2016

rm(dd)
dd <- felm(I(100*f9010xf9010) ~ year
           |est|0|firm, 
           weights=ee$f9010_w[field],
           data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01),digits=5)
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_lsk_0.html",digits=5)

rm(dd)
dd <- felm(I(100*f9010xf9010) ~ year
           + I((year - youts_lsk) %in%  -4 )
           + I((year - youts_lsk) %in% -3 )
           + I((year - youts_lsk) %in% -2 )
           # + I((year - youts_lsk) %in% -1 )
           + I((year - youts_lsk) %in% 0 )
           + I((year - youts_lsk) %in% 1 )
           + I((year - youts_lsk) %in% 2 )
           + I((year - youts_lsk) %in% 3 )
           + I((year - youts_lsk)  %in% 4 )
           |est|0|firm, 
           weights=ee$f9010_w[field],
           data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01),digits=5)
htmlreg(dd,stars=c(0.1,0.05,0.01),digits=5,file="twfe_lsk_trend.html")

rm(dd)
dd <- felm(I(100*f9010xf9010) ~ year
           + I(yrouts_lsk*((year - youts_lsk) %in%  -4 ))
           + I(yrouts_lsk*((year - youts_lsk) %in% -3 ))
           + I(yrouts_lsk*((year - youts_lsk) %in% -2 ))
           # + I(yrouts_lsk*((year - youts_lsk) %in% -1 ))
           + I(yrouts_lsk*((year - youts_lsk) %in% 0 ))
           + I(yrouts_lsk*((year - youts_lsk) %in% 1 ))
           + I(yrouts_lsk*((year - youts_lsk) %in% 2 ))
           + I(yrouts_lsk*((year - youts_lsk) %in% 3 ))
           + I(yrouts_lsk*((year - youts_lsk)  %in% 4 ))
           # + log(count)
           # + lnnbwkrs_cumneg
           |est|0|firm, 
           weights=ee$f9010_w[field],
           data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01),digits=5)
htmlreg(dd,stars=c(0.1,0.05,0.01),digits=5,file="twfe_rlsk_trend.html")



fieldz <- ee$f9010_w>0 & is.na(ee$f9010_w)==F & ee$year %in% c(2001:2017)
  # (ee$youts_lsk==10000 | ee$year<ee$y2outs_lsk) & 
  # (ee$year-ee$youts_lsk)<5 &
  # (ee$year-ee$youts_lsk) %in% c(-20:-5)==F & ee$year>2000  # & ee$year<2016

rm(dd)
table(ee$youts_lsk)
dd <- felm(I(100*f9010xf9010) ~ year
           |est|0|firm, 
           weights=ee$f9010_w[fieldz],
           data=ee[fieldz,])
screenreg(list(dd),stars=c(0.1,0.05,0.01),digits=5)
htmlreg(list(dd),stars=c(0.1,0.05,0.01),digits=5,file="twfe_lsk_base_trend_2001-2017.html")

rm(dd)
dd <- felm(I(100*f9010xf9010) ~ year
           + outs_lsk_cum
           |est|0|firm, 
           weights=ee$f9010_w[fieldz],
           data=ee[fieldz,])
screenreg(list(dd),stars=c(0.1,0.05,0.01),digits=5)
htmlreg(list(dd),stars=c(0.1,0.05,0.01),digits=5,file="twfe_cum_lsk_trend_2001-2017.html")

rm(dd)
dd <- felm(I(100*f9010xf9010) ~ year
           + routs_lsk_cum
           |est|0|firm, 
           weights=ee$f9010_w[fieldz],
           data=ee[fieldz,])
screenreg(list(dd),stars=c(0.1,0.05,0.01),digits=5)
htmlreg(list(dd),stars=c(0.1,0.05,0.01),digits=5,file="twfe_cum_rlsk_trend_2001-2017.html")


#### outsourcing per sector ####
# 49
rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I((year - youts_49)  %in% -4 )
            + I((year - youts_49) %in% -3 )
            + I((year - youts_49) %in% -2 )
            # + I((year - youts_49) %in% -1 )
            + I((year - youts_49) %in% 0 )
            + I((year - youts_49) %in% 1 )
            + I((year - youts_49) %in% 2 )
            + I((year - youts_49) %in% 3 )
            + I((year - youts_49)  %in% 4 )
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_49.html")


ee$yrouts_49 <- ave(ifelse(is.na(ee$depsize)==T | ee$depsize==0,0,ee$outs_49/ee$depsize)*(ee$youts_49==10000 | ee$year<ee$y2outs_lsk),ee$est,FUN=max)
rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I(yrouts_49*((year - youts_49)  %in% -4 ))
            + I(yrouts_49*((year - youts_49) %in% -3 ))
            + I(yrouts_49*((year - youts_49) %in% -2 ))
            # + I((year - youts_49) %in% -1 )
            + I(yrouts_49*((year - youts_49) %in% 0 ))
            + I(yrouts_49*((year - youts_49) %in% 1 ))
            + I(yrouts_49*((year - youts_49) %in% 2 ))
            + I(yrouts_49*((year - youts_49) %in% 3 ))
            + I(yrouts_49*((year - youts_49)  %in% 4 ))
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_r49.html")
ee[,c("yrouts_49","youts_49"):=NULL] #Deleting variables


rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I((year - youts_52)  %in% -4 )
            + I((year - youts_52) %in% -3 )
            + I((year - youts_52) %in% -2 )
            # + I((year - youts_52) %in% -1 )
            + I((year - youts_52) %in% 0 )
            + I((year - youts_52) %in% 1 )
            + I((year - youts_52) %in% 2 )
            + I((year - youts_52) %in% 3 )
            + I((year - youts_52)  %in% 4 )
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_52.html")

ee$yrouts_52 <- ave(ifelse(is.na(ee$depsize)==T | ee$depsize==0,0,ee$outs_52/ee$depsize)*(ee$youts_52==10000 | ee$year<ee$y2outs_lsk),ee$est,FUN=max)
rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I(yrouts_52*((year - youts_52)  %in% -4 ))
            + I(yrouts_52*((year - youts_52) %in% -3 ))
            + I(yrouts_52*((year - youts_52) %in% -2 ))
            # + I((year - youts_52) %in% -1 )
            + I(yrouts_52*((year - youts_52) %in% 0 ))
            + I(yrouts_52*((year - youts_52) %in% 1 ))
            + I(yrouts_52*((year - youts_52) %in% 2 ))
            + I(yrouts_52*((year - youts_52) %in% 3 ))
            + I(yrouts_52*((year - youts_52)  %in% 4 ))
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_r52.html")
ee[,c("yrouts_52","youts_52"):=NULL] #Deleting variables

rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I((year - youts_56) %in% -4)
            + I((year - youts_56) %in% -3 )
            + I((year - youts_56) %in% -2 )
            # + I((year - youts_56) %in% -1 )
            + I((year - youts_56) %in% 0 )
            + I((year - youts_56) %in% 1 )
            + I((year - youts_56) %in% 2 )
            + I((year - youts_56) %in% 3 )
            + I((year - youts_56) %in% 4 )
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_56.html")

ee$yrouts_56 <- ave(ifelse(is.na(ee$depsize)==T | ee$depsize==0,0,ee$outs_56/ee$depsize)*(ee$youts_56==10000 | ee$year<ee$y2outs_lsk),ee$est,FUN=max)
rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I(yrouts_56*((year - youts_56)  %in% -4 ))
            + I(yrouts_56*((year - youts_56) %in% -3 ))
            + I(yrouts_56*((year - youts_56) %in% -2 ))
            # + I((year - youts_56) %in% -1 )
            + I(yrouts_56*((year - youts_56) %in% 0 ))
            + I(yrouts_56*((year - youts_56) %in% 1 ))
            + I(yrouts_56*((year - youts_56) %in% 2 ))
            + I(yrouts_56*((year - youts_56) %in% 3 ))
            + I(yrouts_56*((year - youts_56)  %in% 4 ))
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_r56.html")
ee[,c("yrouts_56","youts_56"):=NULL] #Deleting variables

rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I((year - youts_80)  %in% -4 )
            + I((year - youts_80) %in% -3 )
            + I((year - youts_80) %in% -2 )
            # + I((year - youts_80) %in% -1 )
            + I((year - youts_80) %in% 0 )
            + I((year - youts_80) %in% 1 )
            + I((year - youts_80) %in% 2 )
            + I((year - youts_80) %in% 3 )
            + I((year - youts_80)  %in% 4 )
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_80.html")

ee$yrouts_80 <- ave(ifelse(is.na(ee$depsize)==T | ee$depsize==0,0,ee$outs_80/ee$depsize)*(ee$youts_80==10000 | ee$year<ee$y2outs_lsk),ee$est,FUN=max)
rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I(yrouts_80*((year - youts_80)  %in% -4 ))
            + I(yrouts_80*((year - youts_80) %in% -3 ))
            + I(yrouts_80*((year - youts_80) %in% -2 ))
            # + I((year - youts_80) %in% -1 )
            + I(yrouts_80*((year - youts_80) %in% 0 ))
            + I(yrouts_80*((year - youts_80) %in% 1 ))
            + I(yrouts_80*((year - youts_80) %in% 2 ))
            + I(yrouts_80*((year - youts_80) %in% 3 ))
            + I(yrouts_80*((year - youts_80)  %in% 4 ))
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_r80.html")
ee[,c("yrouts_80","youts_80"):=NULL] #Deleting variables


rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I((year - youts_81)  %in% -4 )
            + I((year - youts_81) %in% -3 )
            + I((year - youts_81) %in% -2 )
            # + I((year - youts_81) %in% -1 )
            + I((year - youts_81) %in% 0 )
            + I((year - youts_81) %in% 1 )
            + I((year - youts_81) %in% 2 )
            + I((year - youts_81) %in% 3 )
            + I((year - youts_81) %in% 4 )
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_81.html")

ee$yrouts_81 <- ave(ifelse(is.na(ee$depsize)==T | ee$depsize==0,0,ee$outs_81/ee$depsize)*(ee$youts_81==10000 | ee$year<ee$y2outs_lsk),ee$est,FUN=max)
rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I(yrouts_81*((year - youts_81)  %in% -4 ))
            + I(yrouts_81*((year - youts_81) %in% -3 ))
            + I(yrouts_81*((year - youts_81) %in% -2 ))
            # + I((year - youts_81) %in% -1 )
            + I(yrouts_81*((year - youts_81) %in% 0 ))
            + I(yrouts_81*((year - youts_81) %in% 1 ))
            + I(yrouts_81*((year - youts_81) %in% 2 ))
            + I(yrouts_81*((year - youts_81) %in% 3 ))
            + I(yrouts_81*((year - youts_81)  %in% 4 ))
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_r81.html")
ee[,c("yrouts_81","youts_81"):=NULL] #Deleting variables

#### Outsourcing including arrival effects ####

field <- ee$f9010_w>0 & is.na(ee$f9010_w)==F &
  (pmin(ee$youts_lsk,ee$yarr_lsk)==10000 | ee$year<pmin(ee$y2outs_lsk,ee$y2arr_lsk)) &
  (ee$year-ee$youts_lsk)<5 &
  (ee$year-ee$yarr_lsk)<5 &
  (ee$year-ee$youts_lsk) %in% c(-20:-5)==F &
  (ee$year-ee$yarr_lsk) %in% c(-20:-5)==F

rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I( ((year - youts_lsk) %in% c(-4)) |((year - yarr_lsk) %in% c(-4)) )
            + I( ((year - youts_lsk) %in% -3) |((year - yarr_lsk) %in% -3 ))
            + I( ((year - youts_lsk) %in% -2) | ((year - yarr_lsk) %in% -2 ))
            # + I( ((year - youts_lsk) %in% -1) | ((year - yarr_lsk) %in% -1 ))
            + I( ((year - youts_lsk) %in% 0) | ((year - yarr_lsk) %in% 0) )
            + I( ((year - youts_lsk) %in% 1) | ((year - yarr_lsk) %in% 1) )
            + I( ((year - youts_lsk) %in% 2) | ((year - yarr_lsk) %in% 2) )
            + I( ((year - youts_lsk) %in% 3) | ((year - yarr_lsk) %in% 3) )
            + I( ((year - youts_lsk)  %in% 4) |((year - yarr_lsk)  %in% 4))
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_lsk_deparr.html")


field <- ee$f9010_w>0 & is.na(ee$f9010_w)==F &
  (pmin(ee$youts_lsk,ee$yarr_lsk)==10000 | ee$year<pmin(ee$y2outs_lsk,ee$y2arr_lsk)) &
  (ee$year-ee$youts_lsk)<5 &
  (ee$year-ee$yarr_lsk)<5 &
  (ee$year-ee$youts_lsk) %in% c(-20:-5)==F &
  (ee$year-ee$yarr_lsk) %in% c(-20:-5)==F

rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I(yrouts_arr_lsk*(((year - youts_lsk) %in% c(-4)) |((year - yarr_lsk) %in% c(-4)) ))
            + I(yrouts_arr_lsk*( ((year - youts_lsk) %in% -3) |((year - yarr_lsk) %in% -3 )))
            + I(yrouts_arr_lsk*( ((year - youts_lsk) %in% -2) | ((year - yarr_lsk) %in% -2 )))
            # + I(yrouts_arr_lsk*( ((year - youts_lsk) %in% -1) | ((year - yarr_lsk) %in% -1 )))
            + I(yrouts_arr_lsk*( ((year - youts_lsk) %in% 0) | ((year - yarr_lsk) %in% 0) ))
            + I(yrouts_arr_lsk*( ((year - youts_lsk) %in% 1) | ((year - yarr_lsk) %in% 1) ))
            + I(yrouts_arr_lsk*( ((year - youts_lsk) %in% 2) | ((year - yarr_lsk) %in% 2) ))
            + I(yrouts_arr_lsk*( ((year - youts_lsk) %in% 3) | ((year - yarr_lsk) %in% 3) ))
            + I(yrouts_arr_lsk*( ((year - youts_lsk)  %in% 4) |((year - yarr_lsk)  %in% 4)))
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_rlsk_deparr.html")

#### High skill outsourcing ####

ee$y2outs_hsk <- ave(ifelse(ee$year==ee$youts_hsk,10000,ifelse((ee$outs_hsk>0)*ee$year>0,(ee$outs_hsk>0)*ee$year,10000)),ee$est,FUN=min)
field <- ee$f9010_w>0 & is.na(ee$f9010_w)==F & (ee$youts_hsk==10000 | ee$year<ee$y2outs_hsk) &
  (ee$year-ee$youts_hsk)<5 &
  (ee$year-ee$youts_hsk) %in% c(-20:-5)==F 
rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I((year - youts_hsk)  %in% -4 )
            + I((year - youts_hsk) %in% -3 )
            + I((year - youts_hsk) %in% -2 )
            # + I((year - youts_hsk) %in% -1 )
            + I((year - youts_hsk) %in% 0 )
            + I((year - youts_hsk) %in% 1 )
            + I((year - youts_hsk) %in% 2 )
            + I((year - youts_hsk) %in% 3 )
            + I((year - youts_hsk)  %in% 4 )
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
summary(dd)
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_hsk.html")


ee$yrouts_hsk <- ave(ifelse(is.na(ee$depsize)==T | ee$depsize==0,0,ee$outs_hsk/ee$depsize)*(ee$youts_hsk==10000 | ee$year<ee$y2outs_lsk),ee$est,FUN=max)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
           + I(yrouts_hsk*((year - youts_hsk)  %in% -4 ))
           + I(yrouts_hsk*((year - youts_hsk) %in% -3 ))
           + I(yrouts_hsk*((year - youts_hsk) %in% -2 ))
           # + I((year - youts_hsk) %in% -1 )
           + I(yrouts_hsk*((year - youts_hsk) %in% 0 ))
           + I(yrouts_hsk*((year - youts_hsk) %in% 1 ))
           + I(yrouts_hsk*((year - youts_hsk) %in% 2 ))
           + I(yrouts_hsk*((year - youts_hsk) %in% 3 ))
           + I(yrouts_hsk*((year - youts_hsk)  %in% 4 ))
           |est|0|firm, 
           weights=ee$f9010_w[field],
           data=ee[field ,])
summary(dd)
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_rhsk.html")



field <- ee$f9010_w>0 & is.na(ee$f9010_w)==F & (pmin(ee$youts_hsk,ee$youts_lsk)==10000 | ee$year<pmin(ee$y2outs_hsk,ee$y2outs_lsk)) &
  (ee$year-ee$youts_hsk)<5 &
  (ee$year-ee$youts_hsk) %in% c(-20:-5)==F &
  (ee$year-ee$youts_lsk)<5 &
  (ee$year-ee$youts_lsk) %in% c(-20:-5)==F 


rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I((year - pmin(youts_hsk,youts_lsk))  %in% -4 )
            + I((year - pmin(youts_hsk,youts_lsk)) %in% -3 )
            + I((year - pmin(youts_hsk,youts_lsk)) %in% -2 )
            # + I((year - pmin(youts_hsk,youts_lsk)) %in% -1 )
            + I((year - pmin(youts_hsk,youts_lsk)) %in% 0 )
            + I((year - pmin(youts_hsk,youts_lsk)) %in% 1 )
            + I((year - pmin(youts_hsk,youts_lsk)) %in% 2 )
            + I((year - pmin(youts_hsk,youts_lsk)) %in% 3 )
            + I((year - pmin(youts_hsk,youts_lsk))  %in% 4 )
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
summary(dd)
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_hsk_lsk.html")



rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
           + I(pmax(yrouts_hsk,yrouts_lsk)*((year - pmin(youts_hsk,youts_lsk))  %in% -4 ))
           + I(pmax(yrouts_hsk,yrouts_lsk)*((year - pmin(youts_hsk,youts_lsk)) %in% -3 ))
           + I(pmax(yrouts_hsk,yrouts_lsk)*((year - pmin(youts_hsk,youts_lsk)) %in% -2 ))
           # + I((year - pmin(youts_hsk,youts_lsk)) %in% -1 )
           + I(pmax(yrouts_hsk,yrouts_lsk)*((year - pmin(youts_hsk,youts_lsk)) %in% 0 ))
           + I(pmax(yrouts_hsk,yrouts_lsk)*((year - pmin(youts_hsk,youts_lsk)) %in% 1 ))
           + I(pmax(yrouts_hsk,yrouts_lsk)*((year - pmin(youts_hsk,youts_lsk)) %in% 2 ))
           + I(pmax(yrouts_hsk,yrouts_lsk)*((year - pmin(youts_hsk,youts_lsk)) %in% 3 ))
           + I(pmax(yrouts_hsk,yrouts_lsk)*((year - pmin(youts_hsk,youts_lsk))  %in% 4 ))
           |est|0|firm, 
           weights=ee$f9010_w[field],
           data=ee[field ,])
summary(dd)
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_rhsk_lsk.html")

#### WITH CUMULATIVE DUMMIES ####

rm(mm)
gc()
field <- ee$f9010_w>0 & is.na(ee$f9010_w)==F &
        (ee$youts_lsk==10000 | ee$year<ee$y2outs_lsk) # & 
       #  (ee$year-ee$youts_lsk)<5 &
       # (ee$year-ee$youts_lsk) %in% c(-20:-5)==F
rm(dd)  
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I((year - youts_lsk) <= -4 & (year - youts_lsk)>-20)
            + I((year - youts_lsk) %in% -3 )
            + I((year - youts_lsk) %in% -2 )
            # + I((year - youts_lsk) %in% -1 )
            + I((year - youts_lsk) %in% 0 )
            + I((year - youts_lsk) %in% 1 )
            + I((year - youts_lsk) %in% 2 )
            + I((year - youts_lsk) %in% 3 )
            + I((year - youts_lsk) >= 4 )
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_c_lsk_1.html")



field <- ee$f9010_w>0 & is.na(ee$f9010_w)==F &
  (ee$youts_lsk==10000 | ee$year<ee$y2outs_lsk) # & 
#  (ee$year-ee$youts_lsk)<5 &
# (ee$year-ee$youts_lsk) %in% c(-20:-5)==F

rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I((year - youts_lsk) <= -4 & (year - youts_lsk)>-20)
            + I((year - youts_lsk) %in% -3 )
            + I((year - youts_lsk) %in% -2 )
            # + I((year - youts_lsk) %in% -1 )
            + I((year - youts_lsk) %in% 0 )
            + I((year - youts_lsk) %in% 1 )
            + I((year - youts_lsk) %in% 2 )
            + I((year - youts_lsk) %in% 3 )
            + I((year - youts_lsk) >= 4 )
            + log(count)
            + lnnbwkrs_cumneg
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_c_lsk_size.html")
gc()




#### Subsidiarizing including arrival effects ####

field <- ee$f9010_w>0 & is.na(ee$f9010_w)==F &
  (pmin(ee$ydep_sub,ee$yarr_sub)==10000 | ee$year<pmin(ee$y2dep_sub,ee$y2arr_sub)) &
  (ee$year-ee$ydep_sub)<5 &
  (ee$year-ee$yarr_sub)<5 &
  (ee$year-ee$ydep_sub) %in% c(-20:-5)==F &
  (ee$year-ee$yarr_sub) %in% c(-20:-5)==F

rm(dd)
table(field)

dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I( ((year - ydep_sub) %in% c(-4)) |((year - yarr_sub) %in% c(-4)) )
            + I( ((year - ydep_sub) %in% -3) |((year - yarr_sub) %in% -3 ))
            + I( ((year - ydep_sub) %in% -2) | ((year - yarr_sub) %in% -2 ))
            # + I( ((year - ydep_sub) %in% -1) | ((year - yarr_sub) %in% -1 ))
            + I( ((year - ydep_sub) %in% 0) | ((year - yarr_sub) %in% 0) )
            + I( ((year - ydep_sub) %in% 1) | ((year - yarr_sub) %in% 1) )
            + I( ((year - ydep_sub) %in% 2) | ((year - yarr_sub) %in% 2) )
            + I( ((year - ydep_sub) %in% 3) | ((year - yarr_sub) %in% 3) )
            + I( ((year - ydep_sub)  %in% 4) |((year - yarr_sub)  %in% 4))
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_sub_deparr.html")


field <- ee$f9010_w>0 & is.na(ee$f9010_w)==F &
  (pmin(ee$ydep_sub,ee$yarr_sub)==10000 | ee$year<pmin(ee$y2dep_sub,ee$y2arr_sub)) &
  (ee$year-ee$ydep_sub)<5 &
  (ee$year-ee$yarr_sub)<5 &
  (ee$year-ee$ydep_sub) %in% c(-20:-5)==F &
  (ee$year-ee$yarr_sub) %in% c(-20:-5)==F

rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I(yrdep_arr_sub*(((year - ydep_sub) %in% c(-4)) |((year - yarr_sub) %in% c(-4)) ))
            + I(yrdep_arr_sub*( ((year - ydep_sub) %in% -3) |((year - yarr_sub) %in% -3 )))
            + I(yrdep_arr_sub*( ((year - ydep_sub) %in% -2) | ((year - yarr_sub) %in% -2 )))
            # + I(yrdep_arr_sub*( ((year - ydep_sub) %in% -1) | ((year - yarr_sub) %in% -1 )))
            + I(yrdep_arr_sub*( ((year - ydep_sub) %in% 0) | ((year - yarr_sub) %in% 0) ))
            + I(yrdep_arr_sub*( ((year - ydep_sub) %in% 1) | ((year - yarr_sub) %in% 1) ))
            + I(yrdep_arr_sub*( ((year - ydep_sub) %in% 2) | ((year - yarr_sub) %in% 2) ))
            + I(yrdep_arr_sub*( ((year - ydep_sub) %in% 3) | ((year - yarr_sub) %in% 3) ))
            + I(yrdep_arr_sub*( ((year - ydep_sub)  %in% 4) |((year - yarr_sub)  %in% 4)))
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_rsub_deparr.html")


#### Flow between flows of the same group including arrival effects ####



field <- ee$f9010_w>0 & is.na(ee$f9010_w)==F &
  (pmin(ee$ydep_int,ee$yarr_int)==10000 | ee$year<pmin(ee$y2dep_int,ee$y2arr_int)) &
  (ee$year-ee$ydep_int)<5 &
  (ee$year-ee$yarr_int)<5 &
  (ee$year-ee$ydep_int) %in% c(-20:-5)==F &
  (ee$year-ee$yarr_int) %in% c(-20:-5)==F

rm(dd)
table(field)

dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I( ((year - ydep_int) %in% c(-4)) |((year - yarr_int) %in% c(-4)) )
            + I( ((year - ydep_int) %in% -3) |((year - yarr_int) %in% -3 ))
            + I( ((year - ydep_int) %in% -2) | ((year - yarr_int) %in% -2 ))
            # + I( ((year - ydep_int) %in% -1) | ((year - yarr_int) %in% -1 ))
            + I( ((year - ydep_int) %in% 0) | ((year - yarr_int) %in% 0) )
            + I( ((year - ydep_int) %in% 1) | ((year - yarr_int) %in% 1) )
            + I( ((year - ydep_int) %in% 2) | ((year - yarr_int) %in% 2) )
            + I( ((year - ydep_int) %in% 3) | ((year - yarr_int) %in% 3) )
            + I( ((year - ydep_int)  %in% 4) |((year - yarr_int)  %in% 4))
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_int_deparr.html")


field <- ee$f9010_w>0 & is.na(ee$f9010_w)==F &
  (pmin(ee$ydep_int,ee$yarr_int)==10000 | ee$year<pmin(ee$y2dep_int,ee$y2arr_int)) &
  (ee$year-ee$ydep_int)<5 &
  (ee$year-ee$yarr_int)<5 &
  (ee$year-ee$ydep_int) %in% c(-20:-5)==F &
  (ee$year-ee$yarr_int) %in% c(-20:-5)==F

rm(dd)
dd <- felm(I(100*f9010xf9010) ~ factor(year) 
            + I(yrdep_arr_int*(((year - ydep_int) %in% c(-4)) |((year - yarr_int) %in% c(-4)) ))
            + I(yrdep_arr_int*( ((year - ydep_int) %in% -3) |((year - yarr_int) %in% -3 )))
            + I(yrdep_arr_int*( ((year - ydep_int) %in% -2) | ((year - yarr_int) %in% -2 )))
            # + I(yrdep_arr_int*( ((year - ydep_int) %in% -1) | ((year - yarr_int) %in% -1 )))
            + I(yrdep_arr_int*( ((year - ydep_int) %in% 0) | ((year - yarr_int) %in% 0) ))
            + I(yrdep_arr_int*( ((year - ydep_int) %in% 1) | ((year - yarr_int) %in% 1) ))
            + I(yrdep_arr_int*( ((year - ydep_int) %in% 2) | ((year - yarr_int) %in% 2) ))
            + I(yrdep_arr_int*( ((year - ydep_int) %in% 3) | ((year - yarr_int) %in% 3) ))
            + I(yrdep_arr_int*( ((year - ydep_int)  %in% 4) |((year - yarr_int)  %in% 4)))
            |est|0|firm, 
            weights=ee$f9010_w[field],
            data=ee[field ,])
screenreg(list(dd),stars=c(0.1,0.05,0.01))
htmlreg(dd,stars=c(0.1,0.05,0.01),file="twfe_rint_deparr.html")
gc()